This database has been collected from the Data Worldbank (https://data.worldbank.org/topic/environment) to extract and visualize forest area (% of land area) data.
License CC-BY 4.0:
### Load Database and Libraries
Code
database_loc =R"C:\Users\ERMCl\Documents\FreelanceWork\Portfolio\Forest_Area_Internationally_2024.0125\API_6_DS2_en_csv_v2_6303688.csv"import duckdbconnection = duckdb.connect(database=':memory:')import pandas as pdimport geopandas as gpdimport plotly.graph_objects as goimport plotly.express as pxcreate_table_total_total_total_total_total_total_query ="""DROP TABLE ForestDB;"""
Complete Environmental Data From data.worldbank.org
Question 1: Which country has lost the most Area of Forest from 1993 to 2021?
Code
query ="""SELECT DISTINCT Country_Name AS 'Country Name', Indicator_Name AS 'Indicator Name', y1993, y1994, y1995, y1996, y1997, y1998, y1999, y2000, y2001, y2002, y2003, y2004, y2005, y2006, y2007, y2008, y2009, y2010, y2011, y2012, y2013, y2014, y2015, y2016, y2017, y2018, y2019, y2020, y2021, (y2021 - y1993) AS 'Total % Forest Area Gained or Lost (1993-2021)'FROM ForestDBWHERE Indicator_Name ='Forest area (% of land area)'ORDER BY (y2021 - y1993) ASCLIMIT 5;"""table = connection.execute(query).df()table
Country Name
Indicator Name
y1993
y1994
y1995
y1996
y1997
y1998
y1999
y2000
...
y2013
y2014
y2015
y2016
y2017
y2018
y2019
y2020
y2021
Total % Forest Area Gained or Lost (1993-2021)
0
Nicaragua
Forest area (% of land area)
51
50
49
48
47
47
46
45
...
33
33
32
32
31
30
29
28
27
-24
1
Paraguay
Forest area (% of land area)
62
62
61
60
60
59
59
58
...
46
45
44
44
43
42
41
41
40
-22
2
Northern Mariana Islands
Forest area (% of land area)
72
72
71
71
71
70
70
69
...
65
64
64
53
53
53
53
53
53
-19
3
Cambodia
Forest area (% of land area)
62
62
62
62
61
61
61
61
...
54
52
50
49
48
47
47
46
45
-17
4
Gambia, The
Forest area (% of land area)
39
39
38
38
37
36
36
35
...
28
27
27
26
26
25
25
24
23
-16
5 rows × 32 columns
Reorganize previous table .melt splits several columns into just two: 1) the variable and 2) value
Code
table_melted = table.melt(id_vars=['Country Name', 'Indicator Name', 'Total % Forest Area Gained or Lost (1993-2021)'], var_name='Year', value_name='% Forest Area')table_melted['Year'] = table_melted['Year'].str.extract('(\d+)', expand=False).astype(int)table_melted
Country Name
Indicator Name
Total % Forest Area Gained or Lost (1993-2021)
Year
% Forest Area
0
Nicaragua
Forest area (% of land area)
-24
1993
51
1
Paraguay
Forest area (% of land area)
-22
1993
62
2
Northern Mariana Islands
Forest area (% of land area)
-19
1993
72
3
Cambodia
Forest area (% of land area)
-17
1993
62
4
Gambia, The
Forest area (% of land area)
-16
1993
39
...
...
...
...
...
...
140
Nicaragua
Forest area (% of land area)
-24
2021
27
141
Paraguay
Forest area (% of land area)
-22
2021
40
142
Northern Mariana Islands
Forest area (% of land area)
-19
2021
53
143
Cambodia
Forest area (% of land area)
-17
2021
45
144
Gambia, The
Forest area (% of land area)
-16
2021
23
145 rows × 5 columns
Create a linear graph to visualize the 5 countries that have lost the most forest area from 1993 to 2021
Code
# Create the plotfig = px.line(table_melted, x='Year', y='% Forest Area', color='Country Name', line_shape='linear', width=1000, height=600, hover_name="Country Name", custom_data=['Total % Forest Area Gained or Lost (1993-2021)'] )# Gray out all linesfig.update_traces(line=dict(color='lightgray'))# Define Country with the highest total loss of Forest Area from 1993 to 2021highest_loss_country = table_melted.loc[table_melted['Total % Forest Area Gained or Lost (1993-2021)'].idxmin()]['Country Name']# Add annotations to the country with the highest Forest lossname_data = table_melted[table_melted['Country Name'] == highest_loss_country].iloc[10]fig.add_annotation(x=name_data['Year'], y=name_data['% Forest Area'], text=f"<b>{highest_loss_country}<b>", showarrow=False, yshift=20, font=dict(color='white'))loss_data_point = table_melted[(table_melted['Country Name'] == highest_loss_country) & (table_melted['Year'] == table_melted['Year'].max())]fig.add_annotation(x=loss_data_point['Year'].values[0], y=loss_data_point['% Forest Area'].values[0], text=f"<b>Total % Lost: {loss_data_point['Total % Forest Area Gained or Lost (1993-2021)'].values[0]}%<b>", showarrow=False, yshift=25, font=dict(color='white'))# Edit hover ---> Research how to edit hover!!! current hover is not correcthover_template ="<b>%{hovertext}</b><br>" \"Year: %{x}<br>" \"Forest Area: %{y}% <br>" \"<i>Total Area Loss: %{customdata}% <i><extra></extra>"# " <br>" \ -> for added enterfig.update_traces(hovertemplate=hover_template)# Edit Layout of Graphfig.update_traces(hoverlabel=dict(font=dict(color='black'), bgcolor='lightgray')) #hoverdatafig.update_traces(selector={'name': highest_loss_country}, line=dict(color='red')) #selector hover data#fig.update_traces(hoverlabel=dict(font=dict(color='rgb(102,0,0)')), selector={'name': highest_loss_country})fig.update_traces(hoverlabel=dict(font=dict(color='white'), bgcolor='rgb(128,0,0)'), selector={'name': highest_loss_country})fig.update_xaxes(showgrid=False, range=[1993, 2021])fig.update_yaxes(showgrid=False, range=[0, 100])fig.update_layout(showlegend=False, title_x=0.5, title=dict(text='<b>The Country With The Highest Loss of Forest Area From 1993 to 2021<b>', font=dict(size=20)), paper_bgcolor='rgb(17,17,17)', plot_bgcolor='rgb(17,17,17)', font=dict(color='white'))#Call for graphfig.show()
Add Animation to the graph
Code
import plotly.express as pximport plotly.graph_objects as go# Static Graphfig = px.line(table_melted, x='Year', y='% Forest Area', color='Country Name', line_shape='linear', width=1000, height=600, hover_name="Country Name", custom_data=['Total % Forest Area Gained or Lost (1993-2021)'] )fig.update_traces(line=dict(color='lightgray'))highest_loss_country = table_melted.loc[table_melted['Total % Forest Area Gained or Lost (1993-2021)'].idxmin()]['Country Name']name_data = table_melted[table_melted['Country Name'] == highest_loss_country].iloc[10]fig.add_annotation(x=name_data['Year'], y=name_data['% Forest Area'], text=f"<b>{highest_loss_country}<b>", showarrow=False, yshift=20, font=dict(color='white'))loss_data_point = table_melted[(table_melted['Country Name'] == highest_loss_country) & (table_melted['Year'] == table_melted['Year'].max())]fig.add_annotation(x=loss_data_point['Year'].values[0], y=loss_data_point['% Forest Area'].values[0], text=f"<b>Total % Lost: {loss_data_point['Total % Forest Area Gained or Lost (1993-2021)'].values[0]}%<b>", showarrow=False, yshift=25, font=dict(color='white'))hover_template ="<b>%{hovertext}</b><br>" \"Year: %{x}<br>" \"Forest Area: %{y}% <br>" \"<i>Total Area Loss: %{customdata}% <i><extra></extra>"fig.update_traces(hovertemplate=hover_template)fig.update_traces(hoverlabel=dict(font=dict(color='black'), bgcolor='lightgray'))fig.update_traces(selector={'name': highest_loss_country}, line=dict(color='red'))fig.update_traces(hoverlabel=dict(font=dict(color='white'), bgcolor='rgb(128,0,0)'), selector={'name': highest_loss_country})fig.update_xaxes(showgrid=False, range=[1993, 2021])fig.update_yaxes(showgrid=False, range=[0, 100])fig.update_layout(showlegend=False, title_x=0.5, title=dict(text='<b>The Country With The Highest Loss of Forest Area From 1993 to 2021<b>', font=dict(size=20)), paper_bgcolor='rgb(17,17,17)', plot_bgcolor='rgb(17,17,17)', font=dict(color='white'))# Add frames for animationframes = []for year insorted(table_melted['Year'].unique()): frame_data = table_melted[table_melted['Year'] <= year] frame_traces = [ go.Scatter( x=frame_data[frame_data['Country Name'] == country]['Year'], y=frame_data[frame_data['Country Name'] == country]['% Forest Area'], mode='lines', line=dict(color='red'if country == highest_loss_country else'gray'), name=country ) for country in frame_data['Country Name'].unique() ] frame_traces.append(go.Scatter( x=[max(frame_data['Year'])], y=[max(frame_data['% Forest Area'])], text=[str(year)], mode="text", showlegend=False )) frame = go.Frame(data=frame_traces, name=str(year)) frames.append(frame)fig.frames = frames# Add animation controlsfig.update_layout( updatemenus=[{'type': 'buttons','buttons': [{'label': 'Play','method': 'animate','args': [None, {'frame': {'duration': 300, 'redraw': True}, 'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'linear'}}] }],'direction': 'left','showactive': False,'x': -0.05,'xanchor': 'right','y': 0,'yanchor': 'top' }], sliders=[{'steps': [{'method': 'animate', 'args': [[f'{year}'], {'frame': {'duration': 400, 'redraw': True}, 'mode': 'immediate','transition': {'duration': 200}}],'label': str(year)} for year insorted(table_melted['Year'].unique())] }])# Show the plotfig.show()
Create Full Table In Order to Create a WorldMap
Code
#Create database with only Indicator_Name ='Forest area (% of land area)'query2 ="""SELECT DISTINCT Country_Name AS 'Country Name', Indicator_Name AS 'Indicator Name', y1993, y1994, y1995, y1996, y1997, y1998, y1999, y2000, y2001, y2002, y2003, y2004, y2005, y2006, y2007, y2008, y2009, y2010, y2011, y2012, y2013, y2014, y2015, y2016, y2017, y2018, y2019, y2020, y2021, (y2021 - y1993) AS 'Total % Forest Area Gained or Lost (1993-2021)'FROM ForestDBWHERE Indicator_Name ='Forest area (% of land area)'ORDER BY (y2021 - y1993) DESC"""FullTable = connection.execute(query2).df()#FullTabletable_melted2 = FullTable.melt(id_vars=['Country Name', 'Indicator Name', 'Total % Forest Area Gained or Lost (1993-2021)'], var_name='Year', value_name='% Forest Area')table_melted2['Year'] = table_melted2['Year'].str.extract('(\d+)', expand=False).astype(int)table_melted2
Country Name
Indicator Name
Total % Forest Area Gained or Lost (1993-2021)
Year
% Forest Area
0
Bhutan
Forest area (% of land area)
18.0
1993
54.0
1
Viet Nam
Forest area (% of land area)
16.0
1993
31.0
2
Puerto Rico
Forest area (% of land area)
16.0
1993
40.0
3
Cuba
Forest area (% of land area)
11.0
1993
20.0
4
Dominican Republic
Forest area (% of land area)
10.0
1993
35.0
...
...
...
...
...
...
7709
Serbia
Forest area (% of land area)
NaN
2021
32.0
7710
Luxembourg
Forest area (% of land area)
NaN
2021
34.0
7711
Sint Maarten (Dutch part)
Forest area (% of land area)
NaN
2021
11.0
7712
Montenegro
Forest area (% of land area)
NaN
2021
61.0
7713
South Sudan
Forest area (% of land area)
NaN
2021
11.0
7714 rows × 5 columns
Global Forest Area % Throughout 1993-2021
Code
# Load world map GeoJSON fileworld_map = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))# Update the country names to match the one in world_maptable_melted2.loc[table_melted2['Country Name'] =='United States', 'Country Name'] ='United States of America'table_melted2.loc[table_melted2['Country Name'] =='Congo, Rep.', 'Country Name'] ='Congo'table_melted2.loc[table_melted2['Country Name'] =='Congo, Dem. Rep.', 'Country Name'] ='Dem. Rep. Congo'table_melted2.loc[table_melted2['Country Name'] =='Egypt, Arab Rep.', 'Country Name'] ='Egypt'table_melted2.loc[table_melted2['Country Name'] =='Yemen, Rep.', 'Country Name'] ='Yemen'table_melted2.loc[table_melted2['Country Name'] =='South Sudan', 'Country Name'] ='S. Sudan'table_melted2.loc[table_melted2['Country Name'] =='Russian Federation', 'Country Name'] ='Russia'table_melted2.loc[table_melted2['Country Name'] =='Venezuela, RB', 'Country Name'] ='Venezuela'table_melted2.loc[table_melted2['Country Name'] =='Central African Republic', 'Country Name'] ='Central African Rep.'table_melted2.loc[table_melted2['Country Name'] =='Dominican Republic', 'Country Name'] ='Dominican Rep.'table_melted2.loc[table_melted2['Country Name'] =='Bahamas, The', 'Country Name'] ='Bahamas'table_melted2.loc[table_melted2['Country Name'] =="Cote d'Ivoire", 'Country Name'] ="Côte d'Ivoire"table_melted2.loc[table_melted2['Country Name'] =='Iran, Islamic Rep.', 'Country Name'] ='Iran'table_melted2.loc[table_melted2['Country Name'] =='Syrian Arab Republic', 'Country Name'] ='Syria'table_melted2.loc[table_melted2['Country Name'] =='Turkiye', 'Country Name'] ='Turkey'table_melted2.loc[table_melted2['Country Name'] =='Viet Nam', 'Country Name'] ='Vietnam'table_melted2.loc[table_melted2['Country Name'] =='Lao PDR', 'Country Name'] ='Laos'table_melted2.loc[table_melted2['Country Name'] =="Korea, Dem. People's Rep.", 'Country Name'] ='North Korea'table_melted2.loc[table_melted2['Country Name'] =='Korea, Rep.', 'Country Name'] ='South Korea'table_melted2.loc[table_melted2['Country Name'] =='Solomon Islands', 'Country Name'] ='Solomon Is.'table_melted2.loc[table_melted2['Country Name'] =='S. Sudan', 'Country Name'] ='S. Sudan'#Missing Countries: S.Sudan and Côte d'Ivoire (present, but cannot add), Taiwan, Falkland Is., Antartica not in the table.# Assuming df is your DataFrame containing the forest area datamerged_data = world_map.merge(table_melted2, left_on='name', right_on='Country Name', how='left')fig = px.choropleth(merged_data, geojson=merged_data.geometry, locations=merged_data.index, color='Total % Forest Area Gained or Lost (1993-2021)', hover_name='name', hover_data=['Total % Forest Area Gained or Lost (1993-2021)'], color_continuous_scale='Inferno', #Inferno, Hot or Blackbody projection='natural earth')fig.update_geos(showcountries=True, countrycolor="lightgray", showcoastlines=True, coastlinecolor="white")fig.update_layout(title='<b>Total % Forest Area Lost (1993 - 2021) per Documented Country <b>', title_x=.5, coloraxis_colorbar=dict(title='Total % Forest Area Gained or Lost (1993-2021)'))hover_template ="<b>%{hovertext}</b><br>"+\"Total % Area Loss/Gain: %{customdata}%<br>"+\"<extra></extra>"fig.update_traces(hovertemplate=hover_template)fig.show()
C:\Users\ERMCl\AppData\Local\Temp\ipykernel_29876\3907168935.py:2: FutureWarning:
The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.